-
Notifications
You must be signed in to change notification settings - Fork 89
Update tutorials.rst #1083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Update tutorials.rst #1083
Conversation
Add `--minlen 1` to the `funannotate sort` command because otherwise it throws error
hi so code has already been fixed in the sort routine to default to minlen=0 -- what version are you using of funannotate? |
there's no harm in adding this to the tutorial though. |
I am using |
you can check if which has |
Hi, Despite the fact that it has Here's the output of my time funannotate sort -i 1318_nanopore_r10_flye.genome.cleaned.fasta -b scaffold -o 1318_nanopore_r10_flye.genome.cleaned.sorted.fasta
48 contigs records loaded
Sorting and renaming contig headers
Traceback (most recent call last):
File "/home/intelliyeast/micromamba/envs/funannotate/bin/funannotate", line 10, in <module>
sys.exit(main())
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/funannotate.py", line 717, in main
mod.main(arguments)
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py", line 80, in main
SortRenameHeaders(
File "/home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py", line 37, in SortRenameHeaders
if minlen > 0:
TypeError: '>' not supported between instances of 'NoneType' and 'int'
real 0m0.197s
user 0m0.569s
sys 0m1.162s And here's the cat /home/intelliyeast/micromamba/envs/funannotate/lib/python3.8/site-packages/funannotate/sort.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import sys
import argparse
from Bio.SeqIO.FastaIO import SimpleFastaParser
from funannotate.library import countfasta, softwrap
def SortRenameHeaders(input, basename, output, minlen=0, simplify=False):
Seqs = []
with open(input, "r") as infile:
for header, sequence in SimpleFastaParser(infile):
Seqs.append((header, len(sequence), sequence))
# sort by length
sortedSeqs = sorted(Seqs, key=lambda x: x[1], reverse=True)
# loop through and return contigs and keepers
counter = 1
with open(output, "w") as outfile:
for name, length, seq in sortedSeqs:
if simplify: # try to just split at first space
if " " in name:
newName = name.split(" ")[0]
else:
newName = name
else:
newName = f"{basename}_{counter}"
if len(newName) > 16:
print(
f"Error. {newName} fasta header too long.",
"Choose a different --base name.",
"NCBI/GenBank max is 16 characters.",
)
raise SystemExit(1)
if minlen > 0:
if length >= minlen:
# ony write if length
outfile.write(">{:}\n{:}\n".format(newName, softwrap(seq)))
else:
# always write if we aren't filtering by length
outfile.write(">{:}\n{:}\n".format(newName, softwrap(seq)))
counter += 1
def main(args):
# setup menu with argparse
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
def __init__(self, prog):
super(MyFormatter, self).__init__(prog, max_help_position=48)
parser = argparse.ArgumentParser(
prog="sort_rename.py",
usage="%(prog)s [options] -i genome.fa -o sorted.fa",
description="Script that sorts input by length and then renames contig headers.",
epilog="""Written by Jon Palmer (2016) [email protected]""",
formatter_class=MyFormatter,
)
parser.add_argument("-i", "--input", required=True, help="Multi-fasta genome file")
parser.add_argument("-o", "--out", required=True, help="Cleaned output (FASTA)")
parser.add_argument(
"-b", "--base", default="scaffold", help="Basename of contig header"
)
parser.add_argument(
"-s",
"--simplify",
action="store_true",
help="Try to simplify headers, split at first space",
)
parser.add_argument(
"-m", "--minlen", type=int, help="Contigs shorter than threshold are discarded"
)
args = parser.parse_args(args)
print(("{:,} contigs records loaded".format(countfasta(args.input))))
print("Sorting and renaming contig headers")
if args.minlen:
print(("Removing contigs less than {:} bp".format(args.minlen)))
SortRenameHeaders(
args.input, args.base, args.out, minlen=args.minlen, simplify=args.simplify
)
print(("{:,} contigs saved to file".format(countfasta(args.out))))
if __name__ == "__main__":
main(sys.argv[1:]) |
The current instructions for
funannotate sort
in the tutorial throw the following error:This is because the minlen variable is not being set currently. Therefore, the current version of the code necessitates
--minlen 1
argument.